First we read in the processed dataset.
suppressPackageStartupMessages(library(SingleCellExperiment))
suppressPackageStartupMessages(library(scater))
suppressPackageStartupMessages(library(edgeR))
suppressPackageStartupMessages(library(limma))
# load("../RData/20180725_allFT_Clincluster_12clusters.RData")
sceset <- readRDS("../rds/20180725_allFT_Clincluster_12clusters_sceset.rds")
secretory <- readRDS("../rds/20180910Fresh_secretory_5clusters_clincluster.rds")
# dim(sceset)
# secretory <- sceset[,sceset$final.clusters %in% c(8,9,10) & sceset$source == "Fresh" & sceset$Patient != "15066L"] #
# secretory <- secretory[,logcounts(secretory)["KRT7",] > 2 &
# logcounts(secretory)["EPCAM",] > 2 &
# logcounts(secretory)["PTPRC",] == 0 &
# logcounts(secretory)["CCDC17",] < 1 ]
FTE <- sceset[, !(sceset$final.clusters %in% c(4,6)) & sceset$Patient != "15066L"]
FTE <- FTE[,logcounts(FTE)["EPCAM",] > 2 & logcounts(FTE)["PTPRC",] == 0]
plotTSNE(FTE, colour_by = "source")
plotTSNE(FTE, colour_by = "final.clusters")
P11553 <- FTE[,FTE$Patient2 == "11553"]
P15072 <- FTE[,FTE$Patient2 == "15072"]
P15072$source[P15072$source == "2-day cultured"] <- "6-day cultured"
p1 <- plotTSNE(P11553, colour_by = "source")
p2 <- plotTSNE(P11553, colour_by = "final.clusters")
p3 <- plotTSNE(P15072, colour_by = "source")
p4 <- plotTSNE(P15072, colour_by = "final.clusters")
cowplot::plot_grid(p1,p2,p3,p4)
P15072 <- P15072[, !(P15072$final.clusters %in% c(5,11))]
plotPCA(P15072, colour_by = "source")
matrix <- expm1(logcounts(P15072))
keep <- rowSums(matrix > 1) > 5
sum(keep)
## [1] 13820
dge <- edgeR::DGEList(counts = matrix[keep,]) # make a edgeR object
rm(matrix,keep)
design <- model.matrix(~ 0 + source, data = P15072@colData) # Use 0 because we do not need intercept for this linear model
colnames(design)
## [1] "source6-day cultured" "sourceFresh" "sourceO.N. cultured"
colnames(design) <- gsub(pattern = " cultured", replacement = "", x = colnames(design))
colnames(design) <- gsub(pattern = "-", replacement = "", x = colnames(design))
# Transform count data to log2-counts per million (logCPM), estimate the mean-variance relationship and use this to compute appropriate observation-level weights. The data are then ready for linear modelling.
v <- voom(dge, design, plot = TRUE)
fit <- lmFit(v, design) # Linear Model for Series of Arrays
colnames(design)
## [1] "source6day" "sourceFresh" "sourceO.N."
cont.matrix <- makeContrasts("sourceO.N.-sourceFresh",
"source6day-sourceFresh",
"source6day-sourceO.N.",
levels = design)
fit2 <- contrasts.fit(fit, cont.matrix) # Compute Contrasts from Linear Model Fit
fit2 <- eBayes(fit2) #Empirical Bayes Statistics for Differential Expression
rls <- topTable(fit2, n = Inf, coef = 1, sort = "logFC", lfc = 1, p = 0.05)
rls$gene <- rownames(rls)
# v_expr <- logcounts(secretory)[rownames(rls), secretory$initial.cluster == rownames(n_deg2)[i]]
# rls$ratio1 <- rowSums(v_expr > 0.5)/ncol(v_expr)
# v_expr <- logcounts(secretory)[rownames(rls), secretory$initial.cluster == colnames(n_deg2)[j]]
# rls$ratio2 <- rowSums(v_expr > 0.5)/ncol(v_expr)
fresh_up_ON <- rls[rls$logFC > 1,]
fresh_down_ON <- rls[rls$logFC < -1,]
rls <- topTable(fit2, n = Inf, coef = 2, sort = "logFC", lfc = 1, p = 0.05 )
rls$gene <- rownames(rls)
fresh_up_6d <- rls[rls$logFC > 1,]
fresh_down_6d <- rls[rls$logFC < -1,]
rls <- topTable(fit2, n = Inf, coef = 3, sort = "logFC", lfc = 1, p = 0.05 )
rls$gene <- rownames(rls)
ON_up_6d <- rls[rls$logFC > 1,]
ON_down_6d <- rls[rls$logFC < -1,]
sum(fresh_up_ON$gene %in% fresh_up_6d$gene)
## [1] 1338
P15072$source <- factor(P15072$source, levels = c("Fresh","O.N. cultured", "6-day cultured"))
plotExpression(P15072, features = head(fresh_up_ON$gene,12), x = "source", ncol = 3)
plotExpression(P15072, features = head(fresh_down_ON$gene[fresh_down_ON$gene %in% ON_down_6d$gene],15), x = "source", ncol = 3, colour_by = "source")
CRISP3 is expressed highly in protein atlas. NR4A1 is expressed in fallopian tube. High Expression of Orphan Nuclear Receptor NR4A1 in a Subset of Ovarian Tumors with Worse Outcome (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5154956/)
SERPING1 is special. It is rarely expressed in normal FT but expressed in OvCa.
plotExpression(P15072, features = c("PTGS1","SERPING1"), x = "source")
# write.table(rownames(dge), quote = F, sep = "\n", file = "../GOrilla input/20180914_bg_genes.txt", row.names = F, col.names = F)
# write.table(fresh_up_ON$gene, quote = F, sep = "\n", file = "../GOrilla input/20180914_fresh_up_ON_DEGs.txt", row.names = F, col.names = F)
# write.table(fresh_down_ON$gene, quote = F, sep = "\n", file = "../GOrilla input/20180914_fresh_down_ON_DEGs.txt", row.names = F, col.names = F)
# write.table(fresh_up_6d$gene, quote = F, sep = "\n", file = "../GOrilla input/20180914_fresh_up_6d_DEGs.txt", row.names = F, col.names = F)
# write.table(fresh_down_6d$gene, quote = F, sep = "\n", file = "../GOrilla input/20180914_fresh_down_6d_DEGs.txt", row.names = F, col.names = F)
# write.table(ON_down_6d$gene, quote = F, sep = "\n", file = "../GOrilla input/20180914_ON_down_6d_DEGs.txt", row.names = F, col.names = F)
# write.table(ON_up_6d$gene, quote = F, sep = "\n", file = "../GOrilla input/20180914_ON_up_6d_DEGs.txt", row.names = F, col.names = F)
# write.table(fresh_up_ON$gene, quote = F, sep = "\n", file = "../GOrilla input/20180914_fresh_up_ON_DEGs.txt", row.names = F, col.names = F)
# write.table(fresh_down_ON$gene, quote = F, sep = "\n", file = "../GOrilla input/20180914_fresh_down_ON_DEGs.txt", row.names = F, col.names = F)
Downregulated GOs compared to Fresh:
dim(fresh_up_ON)
## [1] 2006 7
dim(fresh_down_ON)
## [1] 438 7
dim(fresh_up_6d)
## [1] 2856 7
dim(fresh_down_6d)
## [1] 407 7
dim(ON_up_6d)
## [1] 1425 7
dim(ON_down_6d)
## [1] 551 7
tmp <- venn::venn(list(fUPon=fresh_up_ON$gene,
fUP6d=fresh_up_6d$gene,
onUP6d= ON_up_6d$gene),
ilab=TRUE, zcolor = "style")
# attr(tmp, "intersections")
tmp <- venn::venn(list(fDOWNo = fresh_down_ON$gene,
fDown6 = fresh_down_6d$gene,
oDOWN6= ON_down_6d$gene),
ilab=TRUE,
zcolor = "style")
# attr(tmp, "intersections")
tmp <- venn::venn(list(fresh_down_ON = fresh_down_ON$gene,
ON_up_6d= ON_up_6d$gene),
ilab=TRUE,
zcolor = "style")
tmp2 <- attr(tmp, "intersections")
tmp2$`fresh_down_ON:ON_up_6d`
## [1] "FOS" "CRIP2" "GAS6" "UCP2" "NGFRAP1"
## [6] "ASAH1" "IFITM1" "RNASET2" "LY6E" "APOBEC3C"
## [11] "CTGF" "ECH1" "GSN" "CAT" "HMGN3"
## [16] "FOLR1" "LGALS3BP" "CTSH" "HMGN2" "IFITM2"
## [21] "PTGR1" "CTSD" "MAGED1" "PHPT1" "ADIRF"
## [26] "TMEM98" "ATRAID" "MGMT" "IFITM3" "CLDN10"
## [31] "BDH2" "CMTM6" "TECR" "SHMT1" "GSTK1"
## [36] "CST3" "KLK11" "HSPB1" "ETFB" "ATP5G2"
## [41] "UBB" "ALDH7A1" "ATPIF1" "PON2" "HMGB2"
## [46] "FXYD3" "RCN2" "AKR7A2" "DYNLL1" "SUMF2"
## [51] "PRDX2" "GSTT1" "EMP2" "KRTCAP3" "GRN"
## [56] "SEPW1" "BCKDHA" "SYNE2" "SORBS2" "ALKBH7"
## [61] "SH3BGRL" "QPRT" "CCNG1" "ECHDC2" "DNPH1"
## [66] "NDUFA13" "PRDX3" "TOMM7" "C19orf60" "PDIA4"
## [71] "PEBP1" "IDH2" "MGST2" "PLLP" "PRCP"
## [76] "NINJ1" "ADI1" "TSPAN1" "TMEM66" "CRIP1"
## [81] "LSMD1" "PEMT" "TTC3" "HINT2" "CETN2"
## [86] "SUMF1" "MGST3" "ERGIC3" "ASRGL1" "TSPAN6"
## [91] "APLP2" "CD24" "ISOC1" "TMEM230" "ASS1"
## [96] "GLG1" "COX5B" "ECHS1" "TMEM219" "LOC440335"
## [101] "MYL9" "SCRN2" "PSMB9" "CPVL" "FKBP9"
## [106] "LRPAP1" "C9orf16" "UQCRQ" "TMEM9" "PDLIM1"
## [111] "HIGD2A" "TXN2" "PSME1" "TRIM22" "CALCOCO1"
## [116] "COA3" "TMEM14A" "DHRS7" "SCP2" "MFAP2"
## [121] "BSG" "GPX3" "CTSC" "HADH" "ANKRD65"
## [126] "SUCLG2" "NDUFV1" "GEMIN8" "SPON1" "MARCKSL1"
## [131] "AGR2" "OST4" "SMPDL3B" "ALDH3B1" "IFI6"
## [136] "SYK" "LAMB2" "MT2A" "FAM127A" "LBH"
## [141] "EID1" "DGCR6L" "MAP1LC3B" "PBXIP1"
plotExpression(P15072, features = head(tmp2$`fresh_down_ON:ON_up_6d`), x = "source", ncol = 2, colour_by = "source")
p1 <- plotTSNE(P15072, colour_by = c("GAS6"))
p2 <- plotTSNE(P15072, colour_by = c("CRIP2"))
p3 <- plotTSNE(P15072, colour_by = c("FOS"))
p4 <- plotTSNE(P15072, colour_by = c("UCP2"))
cowplot::plot_grid(p1,p2,p3,p4, ncol = 2)
# write.table(tmp2$`fresh_down_ON:ON_up_6d`, quote = F, sep = "\n", file = "../GOrilla input/20180914fresh_down_ON:ON_up_6d.txt", row.names = F, col.names = F)
tmp <- venn::venn(list(fresh_up_ON = fresh_up_ON$gene,
ON_down_6d= ON_down_6d$gene),
ilab=TRUE,
zcolor = "style")
tmp2 <- attr(tmp, "intersections")
tmp2$`fresh_up_ON:ON_down_6d`[1:20]
## [1] "AXL" "MARS" "CDCP1" "CD44" "SLC1A5" "PSMD12"
## [7] "TNFRSF21" "EMP3" "STIP1" "FOSL1" "HSPA4" "TXNRD1"
## [13] "NOP16" "ADPRHL2" "GRWD1" "CADM1" "GTPBP4" "UPP1"
## [19] "PSMD11" "ANXA5"
# write.table(tmp2$`fresh_up_ON:ON_down_6d`, quote = F, sep = "\n", file = "../GOrilla input/20180914fresh_up_ON:ON_down_6d.txt", row.names = F, col.names = F)
Choose some GOs for heatmap
# library(tidyverse)
gos <- readxl::read_excel(path = "../Gorilla output/20180914culturing_GOs.xlsx", sheet = "Summary")
head(gos )
## # A tibble: 6 x 11
## X__1 `GO Term` Description `P-value` `FDR q-value` Enrichment N
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 <NA> GO:00193… fatty acid… 7.65e- 6 5.44e- 3 9.63 12808
## 2 up_ON GO:00063… RNA proces… 1.97e-32 1.40e-28 2.03 12808
## 3 <NA> GO:00511… localizati… 6.72e-16 4.35e-13 1.28 12808
## 4 <NA> GO:00023… immune sys… 7.39e- 8 1.06e- 5 1.33 12808
## 5 <NA> GO:00329… secretion … 2.51e- 6 2.49e- 4 1.43 12808
## 6 <NA> GO:00017… cell activ… 6.23e- 6 5.76e- 4 1.4 12808
## # ... with 4 more variables: B <dbl>, n <dbl>, b <dbl>, Genes <chr>
go_genes <- gos$Genes
go_genes <- unlist(strsplit(go_genes , split = ", "))
go_genes <- unique(go_genes)
go_genes <- go_genes[grep(" - ", go_genes )]
go_genes <- sapply(go_genes, function(x) return(unlist(strsplit(x, " - "))[1]))
names(go_genes) <- NULL
go_genes <- unlist(go_genes)
go_genes <- gsub(pattern = "[[]", replacement = "", x = go_genes)
go_genes <- unique(go_genes)
go_genes <- data.frame(gene = go_genes,
GO = sapply(go_genes, function(x) return(gos$Description[grep(x, gos$Genes)][1] )) )
go_genes <- go_genes[order(go_genes$GO, decreasing = F),]
head(go_genes)
## gene GO
## GCLM GCLM cell activation
## GNB1 GNB1 cell activation
## PRMT5 PRMT5 cell activation
## TOP1 TOP1 cell death
## SERPINB9 SERPINB9 cell death
## NR3C1 NR3C1 cell death
data.plot <- logcounts(P15072)[match(go_genes$gene, rownames(P15072)), ]
colanno <- data.frame(source = P15072$source)
rownames(colanno) <- colnames(data.plot)
colanno <- colanno[order(colanno$source, decreasing = F),, drop = F]
data.plot <- data.plot[,rownames(colanno)]
data.plot <- data.plot[rowSums(data.plot > 0.5) >= 5,]
data.plot <- t(scale(t(data.plot), center = T, scale = T))
data.plot <- Seurat::MinMax(data.plot , min = -2.5, max = 2.5)
data.plot[1:10,1:10]
## 15072L-p2-A02 15072L-p2-A03 15072L-p2-A04 15072L-p2-A05
## GCLM -0.7293457 -0.7293457 -0.72934566 -0.7293457
## GNB1 2.5000000 -0.7045463 2.33193383 -0.7045463
## PRMT5 -0.9495120 1.0722717 -0.94951202 -0.9495120
## TOP1 -1.0185774 -1.0185774 -1.01857741 -1.0185774
## SERPINB9 -0.7458360 1.1165037 0.33882419 -0.7458360
## NR3C1 -0.4094784 -0.5410116 -0.54101161 -0.5410116
## GPR64 -0.5160526 1.6603375 2.34135340 -0.5160526
## SEMA4A 1.2619586 -0.3895052 -0.08719182 -0.4155336
## ITM2B 0.1855766 -0.1921770 0.44632012 -0.5670452
## CPE -0.4688120 -0.4688120 2.22818229 -0.4688120
## 15072L-p2-A09 15072L-p2-A11 15072L-p2-A12 15072L-p2-A18
## GCLM -0.7293457 0.31798348 0.7626400 -0.729345660
## GNB1 -0.7045463 -0.04305448 -0.7045463 -0.704546262
## PRMT5 -0.9495120 1.14490276 -0.9495120 -0.949512016
## TOP1 -1.0185774 0.92745022 1.0694756 -0.283151233
## SERPINB9 -0.5227650 1.49784182 -0.7458360 1.569712400
## NR3C1 -0.5410116 -0.54101161 -0.5410116 2.424453519
## GPR64 -0.5160526 1.16193540 -0.5160526 -0.516052597
## SEMA4A -0.4817796 -0.48177956 -0.4817796 -0.481779562
## ITM2B 0.4871042 0.19476472 -0.2840381 0.322706423
## CPE 2.2811520 -0.46881200 -0.4688120 0.006598591
## 15072L-p2-B01 15072L-p2-B02
## GCLM -0.7293457 -0.7293457
## GNB1 -0.7045463 -0.7045463
## PRMT5 -0.9495120 -0.9495120
## TOP1 0.9186643 -1.0185774
## SERPINB9 1.1743149 0.9853121
## NR3C1 -0.5410116 2.3726608
## GPR64 -0.5160526 -0.5160526
## SEMA4A -0.4817796 -0.4817796
## ITM2B 0.1821424 0.5158808
## CPE -0.3299672 -0.4688120
go_genes <- go_genes[go_genes$gene %in% rownames(data.plot),, drop = F]
my_colours <- colorRampPalette(c("steelblue4", "white", "firebrick2"))(200)
pheatmap::pheatmap(data.plot[as.character(go_genes$gene[grep("response", go_genes$GO)]),], color = my_colours,
show_rownames = T,
show_colnames = F,
cluster_rows = T, cluster_cols = F,annotation_col = colanno)
dim(data.plot)
## [1] 1431 538
table(go_genes$GO)
##
## cell activation
## 3
## cell death
## 3
## cell surface receptor signaling pathway
## 19
## fatty acid metabolic process
## 4
## fatty acid oxidation
## 7
## immune system process
## 90
## localization
## 611
## mitotic cell cycle process
## 81
## negative regulation of programmed cell death
## 10
## programmed cell death
## 61
## regulation of cell migration
## 27
## regulation of cell proliferation
## 18
## regulation of immune system process
## 14
## regulation of signal transduction
## 47
## response to stimulus
## 167
## RNA processing
## 265
## secretion by cell
## 1
pheatmap::pheatmap(data.plot[as.character(go_genes$gene[grep("immune", go_genes$GO)]),], color = my_colours,
show_rownames = T,
show_colnames = F,
cluster_rows = F, cluster_cols = F,annotation_col = colanno)
pheatmap::pheatmap(data.plot[as.character(go_genes$gene[grep("fatty acid", go_genes$GO)]),], color = my_colours,
show_rownames = T,
show_colnames = F,
cluster_rows = F, cluster_cols = F,annotation_col = colanno)
pheatmap::pheatmap(data.plot[as.character(go_genes$gene[grep("cell death", go_genes$GO)]),], color = my_colours,
show_rownames = T,
show_colnames = F,
cluster_rows = F, cluster_cols = F,annotation_col = colanno)
Initiation of centresome duplication CDK2 CDK4 CDK6 PLK2 PLK6 AURKA
MAD2L1 is a component of the mitotic spindle assembly checkpoint that prevents the onset of anaphase until all chromosomes are properly aligned at the metaphase plate.
CDKN1A/p21
This gene encodes a potent cyclin-dependent kinase inhibitor. The encoded protein binds to and inhibits the activity of cyclin-cyclin-dependent kinase2 or -cyclin-dependent kinase4 complexes, and thus functions as a regulator of cell cycle progression at G1. The expression of this gene is tightly controlled by the tumor suppressor protein p53, through which this protein mediates the p53-dependent cell cycle G1 phase arrest in response to a variety of stress stimuli.
plotTSNE(P15072, colour_by = c("CDKN1A"))
Diffusion map is not working well
# P15072 <- P15072[, !(P15072$final.clusters %in% c(5,11))]
# # source("https://bioconductor.org/biocLite.R")
# # biocLite("destiny")
# library(ggthemes)
# library(destiny)
# cellLabels <- colData(P15072)$source
# diff.map <- logcounts(P15072)
# colnames(diff.map) <- cellLabels
# dm <- DiffusionMap(t(diff.map))
#
# tmp <- data.frame(DC1 = eigenvectors(dm)[,1],
# DC2 = eigenvectors(dm)[,2],
# Timepoint = cellLabels)
# ggplot(tmp, aes(x = DC1, y = DC2, colour = Timepoint)) +
# geom_point() + scale_color_tableau() +
# xlab("Diffusion component 1") +
# ylab("Diffusion component 2") +
# theme_classic()
# plotTSNE(FTE2, colour_by = "initial.cluster")